theme_park = read_csv("ultimate data.csv") |> 
  janitor::clean_names()
## Rows: 920 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Park_Name, City, Country, Type, Region
## dbl (2): Year, Attendance
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
theme_park  |> 
    filter(
      region != c("Worldwide")
    ) |> 
    group_by(year, type) |> 
    mutate(
      attendance = attendance / 100000
    ) |> 
    summarise(mean = mean(attendance),
              sum = sum(attendance)) |> 
    knitr::kable(digits = 3)
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
year type mean sum
2019 Amusement/Theme Park 542.806 37996.4
2019 Museum 335.013 20100.8
2019 Water Park 98.315 5898.9
2020 Amusement/Theme Park 186.159 13031.1
2020 Museum 77.742 4664.5
2020 Water Park 38.558 2313.5
2021 Amusement/Theme Park 320.910 22463.7
2021 Museum 107.650 6459.0
2021 Water Park 57.892 3473.5
2022 Amusement/Theme Park 425.616 21280.8
2022 Museum 193.388 11603.3
2022 Water Park 77.972 4678.3
theme_park |> 
  mutate(
    year = as.factor(year)
  ) |> 
  group_by(year) |> 
  plot_ly(y = ~attendance, color = ~year, type = "box", colors = "viridis")
 theme_park|> 
  filter(
    region != c("Worldwide")
  ) |> 
  group_by(region, year) |> 
  summarize(attend_sum = mean(attendance)) |> 
  plot_ly(x = ~year, y = ~attend_sum, color = ~region,
          type = "scatter", mode = 'point', colors = "viridis")
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.